In [3]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import csv
import networkx as nx
import pylab as plt

In [17]:
# get meme_names
meme_name="biaoge"

results_path="/home/clemsos/Dev/mitras/results/"
meme_path=results_path+"/"+meme_name

Load Gephi edges data


In [29]:
# %pylab inline

meme_graph_csv=meme_path+"/"+meme_name+"_edges.csv"

with open(meme_graph_csv, 'rb') as edgefile:
    edgefile.next() # skip headers
    edgecsv=csv.reader(edgefile)
    
    edges=[ str(str(edge[0])+","+str(edge[1])+","+str(edge[2])) for edge in edgecsv]
    print len(edges)
    
    G = nx.parse_edgelist(edges, nodetype=str, delimiter=",",data=(('time',str),))
    
    print len(G.edges())
    print len(G.nodes())
    # nx.read_edgelist(,delimiter=",")


4471
3751
2545

Load d3 weighted edges data


In [12]:
meme_graph_csv=meme_path+"/"+meme_name+"_d3graph.csv"

# directed graph
# G = nx.DiGraph()

with open(meme_graph_csv, 'rb') as edgefile:
    edgefile.next() # skip headers
    edgecsv=csv.reader(edgefile)
    
    #edges=[ str(str(edge[0])+","+str(edge[1])+","+str(edge[2])) for edge in edgecsv]
    #print len(edges)
    #G = nx.parse_edgelist(edges, nodetype=str, delimiter=",",data=(('time',str),))
    
    G=nx.read_weighted_edgelist(edgefile, create_using=nx.DiGraph(), delimiter=",")
    
    print len(G.edges())
    print len(G.nodes())

# nx.draw(G)
# nx.draw_spectral(G)


233
237

In [43]:
N,K = G.order(), G.size()
avg_deg = float(K)/N
print "Nodes: ", N
print "Edges: ", K
print "Average degree: ", avg_deg


Nodes:  85
Edges:  70
Average degree:  0.823529411765

In [18]:
in_degrees = G.in_degree() # dictionary node:degree
in_values = sorted(set(in_degrees.values())) 
in_hist = [in_degrees.values().count(x) for x in in_values]

out_degrees = G.out_degree() # dictionary node:degree
out_values = sorted(set(in_degrees.values())) 
out_hist = [in_degrees.values().count(x) for x in in_values]

plt.figure()
plt.plot(in_values,in_hist,'ro-') # in-degree
plt.plot(out_values,out_hist,'bv-') # out-degree
plt.legend(['In-degree','Out-degree'])
plt.xlabel('Degree')
plt.ylabel('Number of nodes')
plt.title(meme_name+' network')
#plt.savefig('hartford_degree_distribution.pdf')
#plt.close()


---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-18-6d2ff5d59816> in <module>()
----> 1 in_degrees = G.in_degree() # dictionary node:degree
      2 in_values = sorted(set(in_degrees.values()))
      3 in_hist = [in_degrees.values().count(x) for x in in_values]
      4 
      5 plt.figure()

AttributeError: 'Graph' object has no attribute 'in_degree'

In [56]:
G_ud = G.to_undirected()
# Clustering coefficient of all nodes (in a dictionary)
clust_coefficients = nx.clustering(G_ud)
# Average clustering coefficient
ccs = nx.clustering(G_ud)
avg_clust = sum(ccs.values()) / len(ccs)
print avg_clust


{u'uVGJJRKDB': 0.0, u'uKPK5T5PS': 0.0, u'uK3R1EDJ1': 0.0, u'uLCMY22PK': 0.0, u'uRUHUYAX': 0.0, u'uHQYT3OAO': 0.0, u'uVEQD5MX3': 0.0, u'uB5N2ODWU': 0.0, u'uI43WRQGY': 0.0, u'uBIDXNTVP': 0.0, u'uTS23PWWL': 0.0, u'uVT4B0U20': 0.0, u'uKPKW5YCM': 0.0, u'uSIAL4MBT': 0.0, u'uVEQZY1N2': 0.0, u'uS5WYGVW4': 0.0, u'uFAG1C3O': 0.0, u'uP2ZKNHEX': 0.0, u'uZXKOY13H': 0.0, u'uJWIN1QKN': 0.0, u'uBK14MGUP': 0.0, u'uB4GXLJZH': 0.0, u'u02HXZUJK': 0.0, u'uKPKEQK2J': 0.0, u'uEMBQZTQC': 0.0, u'uFAGVDH3': 0.0, u'uB5NF4TLB': 0.0, u'u1CRN24CM': 1.0, u'uWWBH2A4W': 0.0, u'u351AVSWO': 0.0, u'uQSMA351V': 0.0, u'uGL2RIKX1': 0.0, u'uTZ5BQOYR': 0.0, u'uTS2DKHMG': 0.0, u'uPKFOANLF': 0.0, u'uR1W3KXBH': 0.0, u'uBIBYF2KZ': 0.3333333333333333, u'uVGJCR3IR': 0.0, u'u0VNXXKF1': 0.0, u'uLCMYA4GP': 0.0, u'uII5BNXYH': 0.0, u'uII5LYGGE': 0.0, u'uMLLDSR0L': 0.0, u'uRULR2YT': 0.0, u'u1O52QLAR': 0.0, u'uMLL1KZJC': 0.0, u'u1CRNXT4I': 0.0, u'uMMSL5GDJ': 0.0, u'uKBQFVJUT': 0.0, u'uK3X4JJE2': 0.0, u'uHQYOXKT3': 0.0, u'uPVDYBZB3': 0.0, u'uZQXIMVWY': 0.0, u'uPKFMOV0V': 0.0, u'uB4GUDVX5': 0.0, u'uUPCRA1WE': 0.0, u'uPVDSUTXM': 0.0, u'uP2ZOVACO': 0.0, u'uPVDYRJEN': 0.0, u'uS5WKZBT5': 0.0, u'uP2ZMQUZP': 0.0, u'uOQZ3FO1I': 0.0, u'uCBTS0OWI': 0.0, u'uOQZQMFSO': 0.0, u'uZQXIW4FL': 0.0, u'uTZ5DXNZB': 0.0, u'uTS2DMSCY': 0.0, u'u0YUP4D1O': 0.0, u'uATZOMZB2': 0.0, u'uVT4BZSNY': 0.0, u'u1CR2T0OF': 0.0, u'uTZ5JCZVP': 0.0, u'uB4H2KKQO': 0.0, u'uW0EQKD1M': 0.0, u'uQHCQVHRQ': 0.0, u'uZNJX5ZP5': 1.0, u'uZ322NE5R': 0.0, u'uVGJ1W22Z': 0.0, u'uW0EBULPM': 0.0, u'uK3RXEGSV': 0.0, u'u0VPMT21H': 0.0, u'u0YUPKQGI': 0.0, u'uHQYLSQD1': 0.0, u'uLCMT13JX': 0.0, u'uZD34MOXA': 0.0}
0.0274509803922

In [62]:
G_components = nx.connected_component_subgraphs(G.to_undirected())
G_mc = G_components[0]

# Betweenness centrality
bet_cen = nx.betweenness_centrality(G_mc)
# Closeness centrality
clo_cen = nx.closeness_centrality(G_mc)

# Eigenvector centrality
eig_cen = nx.eigenvector_centrality(G_mc)
# print "Eigenvector centrality : %f"%eig_cen
#print eig_cen


{u'uPKFMOV0V': 0.1945428742922252, u'uVGJJRKDB': 0.1945428742922252, u'uVT4B0U20': 0.3242381238203753, u'uZ322NE5R': 0.1945428742922252, u'uK3R1EDJ1': 0.1945428742922252, u'uS5WKZBT5': 0.25939049905630024, u'uTZ5BQOYR': 0.1945428742922252, u'uP2ZOVACO': 0.1945428742922252, u'uZQXIW4FL': 0.7749223370335141}

In [63]:
def highest_centrality(cent_dict): 
     """Returns a tuple (node,value) with the node 
    with largest value from Networkx centrality dictionary.""" 
     # Create ordered tuple of centrality data
     cent_items=[(b,a) for (a,b) in cent_dict.iteritems()]
     # Sort in descending order 
     cent_items.sort() 
     cent_items.reverse() 
     return tuple(reversed(cent_items[0]))

In [65]:
def centrality_scatter(dict1,dict2,path="",ylab="",xlab="",title="",line=False):
 # Create figure and drawing axis
 fig = plt.figure(figsize=(7,7))
 ax1 = fig.add_subplot(111)
 # Create items and extract centralities
 items1 = sorted(dict1.items())
 items2 = sorted(dict2.items())
 xdata=[b for a,b in items1]
 ydata=[b for a,b in items2] 
 
 # Add each actor to the plot by ID 
 for p in xrange(len(items1)):
    ax1.text(x=xdata[p], y=ydata[p],s=str(items1[p][0]), color="b")

    if line: 
        # use NumPy to calculate the best fit
        slope, yint = plt.polyfit(xdata,ydata,1)
        xline = plt.xticks()[0] 
        yline = map(lambda x: slope*x+yint,xline)
        ax1.plot(xline,yline,ls='--',color='b')
        # Set new x- and y-axis limits
        plt.xlim((0.0,max(xdata)+(.15*max(xdata))))
        plt.ylim((0.0,max(ydata)+(.15*max(ydata)))) 
        # Add labels and save
        ax1.set_title(title)
        ax1.set_xlabel(xlab) 
        ax1.set_ylabel(ylab) 
        # plt.savefig(path)

In [19]:
def calculate_degree_centrality(graph): 
    g = graph 
    dc = nx.degree_centrality(g) 
    nx.set_node_attributes(g,degree_cent,dc) 
    degcent_sorted = sorted(dc.items(), key=itemgetter(1), reverse=True) 
    for key,value in degcent_sorted[0:10]: 
        print "Highest degree Centrality:", key, value 
        return graph, dc

calculate_degree_centrality(G)


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-19-d6ee7554d7a5> in <module>()
      8         return graph, dc
      9 
---> 10 calculate_degree_centrality(G)

<ipython-input-19-d6ee7554d7a5> in calculate_degree_centrality(graph)
      2     g = graph
      3     dc = nx.degree_centrality(g)
----> 4     nx.set_node_attributes(g,degree_cent,dc)
      5     degcent_sorted = sorted(dc.items(), key=itemgetter(1), reverse=True)
      6     for key,value in degcent_sorted[0:10]:

NameError: global name 'degree_cent' is not defined

In [69]:
nx.find_cliques(G.to_undirected())


Out[69]:
<generator object find_cliques at 0x3911f50>

In [73]:
import numpy, matplotlib
from scipy.cluster import hierarchy
from scipy.spatial import distance

g=G
path_length=nx.all_pairs_shortest_path_length(g)
n = len(g.nodes())
distances=numpy.zeros((n,n))

for u,p in path_length.iteritems():
	 for v,d in p.iteritems():
	 	 distances[int(u)-1][int(v)-1] = d
sd = distance.squareform(distances)

hier = hierarchy.average(sd)		 

# draw dendogram
hierarchy.dendrogram(hier)


---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-73-c2006c1cf9a3> in <module>()
     13 for u,p in path_length.iteritems():
     14          for v,d in p.iteritems():
---> 15                  distances[int(u)-1][int(v)-1] = d
     16 sd = distance.squareform(distances)

ValueError: invalid literal for int() with base 10: 'uZQXIW4FL'
{u'uZQXIW4FL': {u'uVGJJRKDB': 1, u'uK3R1EDJ1': 1, u'uP2ZOVACO': 1, u'uPKFMOV0V': 1, u'uTZ5BQOYR': 1, u'uZ322NE5R': 1, u'uZQXIW4FL': 0}, u'uVGJJRKDB': {u'uVGJJRKDB': 0}, u'uKPK5T5PS': {u'uKPK5T5PS': 0}, u'uK3R1EDJ1': {u'uK3R1EDJ1': 0}, u'uLCMY22PK': {u'uP2ZKNHEX': 1, u'uLCMY22PK': 0}, u'uRUHUYAX': {u'uRUHUYAX': 0}, u'uHQYT3OAO': {u'uHQYT3OAO': 0, u'uP2ZMQUZP': 1, u'uVEQD5MX3': 1}, u'uVEQD5MX3': {u'uVEQD5MX3': 0}, u'uB5N2ODWU': {u'uB5N2ODWU': 0, u'uKPKEQK2J': 1}, u'uI43WRQGY': {u'uI43WRQGY': 0}, u'uBIDXNTVP': {u'uBIDXNTVP': 0}, u'uTS23PWWL': {u'uTS23PWWL': 0, u'uVEQZY1N2': 1}, u'uVT4B0U20': {u'uVGJJRKDB': 2, u'uK3R1EDJ1': 2, u'uP2ZOVACO': 2, u'uPKFMOV0V': 2, u'uTZ5BQOYR': 2, u'uVT4B0U20': 0, u'uZ322NE5R': 2, u'uZQXIW4FL': 1}, u'uZ322NE5R': {u'uZ322NE5R': 0}, u'uKPKW5YCM': {u'uKPKW5YCM': 0}, u'uSIAL4MBT': {u'uSIAL4MBT': 0}, u'uMLL1KZJC': {u'uMLL1KZJC': 0}, u'uP2ZKNHEX': {u'uP2ZKNHEX': 0}, u'uFAG1C3O': {u'uFAG1C3O': 0, u'uTS2DMSCY': 1}, u'uOQZQMFSO': {u'uOQZQMFSO': 0}, u'uS5WYGVW4': {u'uS5WYGVW4': 0}, u'uZXKOY13H': {u'uZXKOY13H': 0}, u'uJWIN1QKN': {u'uW0EQKD1M': 1, u'uJWIN1QKN': 0}, u'uB4GXLJZH': {u'uB4GXLJZH': 0}, u'u02HXZUJK': {u'u02HXZUJK': 0}, u'uKPKEQK2J': {u'uKPKEQK2J': 0}, u'uEMBQZTQC': {u'uVGJCR3IR': 1, u'uPVDSUTXM': 1, u'uEMBQZTQC': 0}, u'uFAGVDH3': {u'uFAGVDH3': 0, u'uJWIN1QKN': 1, u'uW0EQKD1M': 2}, u'uB5NF4TLB': {u'uB5NF4TLB': 0, u'uKPKW5YCM': 1}, u'u1CRN24CM': {u'u1CRN24CM': 0}, u'u351AVSWO': {u'u351AVSWO': 0}, u'uQSMA351V': {u'uQSMA351V': 0}, u'uGL2RIKX1': {u'uGL2RIKX1': 0, u'uHQYT3OAO': 1, u'uVEQD5MX3': 2, u'uP2ZMQUZP': 2}, u'uTZ5BQOYR': {u'uTZ5BQOYR': 0}, u'uTS2DKHMG': {u'uTS2DKHMG': 0, u'uATZOMZB2': 1}, u'uPKFOANLF': {u'uPKFOANLF': 0}, u'uR1W3KXBH': {u'uR1W3KXBH': 0}, u'uBIBYF2KZ': {u'u1CRN24CM': 1, u'uBIBYF2KZ': 0}, u'uVGJCR3IR': {u'uVGJCR3IR': 0}, u'uZD34MOXA': {u'uZD34MOXA': 0}, u'u0VNXXKF1': {u'u0VNXXKF1': 0}, u'uII5BNXYH': {u'uII5BNXYH': 0}, u'uII5LYGGE': {u'uVEQZY1N2': 1, u'uII5LYGGE': 0}, u'uMLLDSR0L': {u'uMLLDSR0L': 0}, u'uRULR2YT': {u'uRULR2YT': 0}, u'u1O52QLAR': {u'u1O52QLAR': 0}, u'uVEQZY1N2': {u'uVEQZY1N2': 0}, u'u1CRNXT4I': {u'u1CRNXT4I': 0}, u'uMMSL5GDJ': {u'uMMSL5GDJ': 0, u'uZD34MOXA': 1}, u'uKBQFVJUT': {u'uVGJCR3IR': 2, u'uKBQFVJUT': 0, u'uPVDSUTXM': 2, u'uEMBQZTQC': 1}, u'uK3X4JJE2': {u'uLCMYA4GP': 1, u'uK3X4JJE2': 0}, u'uHQYOXKT3': {u'uHQYOXKT3': 0, u'uK3RXEGSV': 1}, u'uPVDYBZB3': {u'uPVDYBZB3': 0}, u'uZQXIMVWY': {u'uZQXIMVWY': 0}, u'uPKFMOV0V': {u'uPKFMOV0V': 0}, u'uB4GUDVX5': {u'uB4GUDVX5': 0, u'uI43WRQGY': 1}, u'uUPCRA1WE': {u'uSIAL4MBT': 1, u'uUPCRA1WE': 0}, u'uPVDSUTXM': {u'uPVDSUTXM': 0}, u'uP2ZOVACO': {u'uP2ZOVACO': 0}, u'uPVDYRJEN': {u'u1CRN24CM': 2, u'uPVDYRJEN': 0, u'uBIBYF2KZ': 1}, u'uS5WKZBT5': {u'uVGJJRKDB': 2, u'uK3R1EDJ1': 2, u'uP2ZOVACO': 2, u'uPKFMOV0V': 2, u'uTZ5BQOYR': 2, u'uZ322NE5R': 2, u'uS5WKZBT5': 0, u'uZQXIW4FL': 1}, u'uP2ZMQUZP': {u'uP2ZMQUZP': 0}, u'uOQZ3FO1I': {u'uOQZ3FO1I': 0}, u'uCBTS0OWI': {u'uPVDYBZB3': 1, u'uCBTS0OWI': 0}, u'uWWBH2A4W': {u'uWWBH2A4W': 0}, u'uBK14MGUP': {u'uOQZQMFSO': 1, u'uBK14MGUP': 0}, u'uTZ5DXNZB': {u'uMLLDSR0L': 1, u'uTZ5DXNZB': 0, u'uRULR2YT': 1}, u'uVGJ1W22Z': {u'uVGJ1W22Z': 0}, u'u0YUP4D1O': {u'u1CRNXT4I': 1, u'u0YUP4D1O': 0}, u'uTS2DMSCY': {u'uTS2DMSCY': 0}, u'uATZOMZB2': {u'uATZOMZB2': 0}, u'u1CR2T0OF': {u'uQSMA351V': 1, u'u1CR2T0OF': 0}, u'u0VPMT21H': {u'u0VPMT21H': 0, u'uBIDXNTVP': 1}, u'uVT4BZSNY': {u'uVT4BZSNY': 0}, u'uTZ5JCZVP': {u'uVT4BZSNY': 1, u'uTZ5JCZVP': 0}, u'uB4H2KKQO': {u'uB4H2KKQO': 0}, u'uW0EQKD1M': {u'uW0EQKD1M': 0}, u'uZNJX5ZP5': {u'uZNJX5ZP5': 0, u'u1CRN24CM': 1, u'uBIBYF2KZ': 1}, u'uHQYLSQD1': {u'uHQYLSQD1': 0, u'uMLL1KZJC': 1}, u'uLCMYA4GP': {u'uLCMYA4GP': 0}, u'uW0EBULPM': {u'uW0EBULPM': 0}, u'uK3RXEGSV': {u'uK3RXEGSV': 0}, u'uQHCQVHRQ': {u'uQHCQVHRQ': 0}, u'uLCMT13JX': {u'uLCMT13JX': 0, u'uZXKOY13H': 1}, u'u0YUPKQGI': {u'u351AVSWO': 1, u'u0YUPKQGI': 0}}

In [16]:
%pylab inline
import community

G=G.to_undirected()
partition = community.best_partition(G)
# print partition

size = float(len(set(partition.values())))
pos = nx.spring_layout(G)

count = 0.
for com in set(partition.values()) :
    count = count + 1.
    list_nodes = [nodes for nodes in partition.keys()
                                if partition[nodes] == com]
    nx.draw_networkx_nodes(G, pos, list_nodes, node_size = 20,
                                node_color = str(count / size))


# nx.draw_networkx_edges(G,pos, alpha=0.5)
nx.draw
plt.show()


Populating the interactive namespace from numpy and matplotlib
WARNING: pylab import has clobbered these variables: ['size', 'plt']
`%pylab --no-import-all` prevents importing * from pylab and numpy

In [ ]:
# utility function to let you print the node + various attributes in a csv format 
def write_node_attributes(graph, attributes):
    if type(attributes) is not list: attributes = [attributes] 
        for node in graph.nodes(): 
            vals = [str(dict[node]) for dict in [nx.get_node_attributes(graph,x) for x in attributes]] 
            print node, ",", ",".join(vals)